• 请在环境变量中设置DB_URI指向数据库

In [8]:
import os
import numpy as np
import pandas as pd
from cvxpy import *
from cvxopt import *
from alphamind.api import *
from alphamind.cython.optimizers import QPOptimizer

Data Preparing



In [9]:
risk_penlty = 0.5
ref_date = '2018-02-08'

engine = SqlEngine(os.environ['DB_URI'])
universe = Universe('ashare_ex')
codes = engine.fetch_codes(ref_date, universe)

risk_cov, risk_exposure = engine.fetch_risk_model(ref_date, codes)
factor = engine.fetch_factor(ref_date, 'EPS', codes)

total_data = pd.merge(factor, risk_exposure, on='code').dropna()
all_styles = risk_styles + industry_styles + macro_styles

risk_exposure_values = total_data[all_styles].values.astype(float)
special_risk_values = total_data['srisk'].values.astype(float)
risk_cov_values = risk_cov[all_styles].values

sec_cov_values_full = risk_exposure_values @ risk_cov_values @ risk_exposure_values.T / 10000  + np.diag(special_risk_values ** 2) / 10000
signal_full = total_data['EPS'].values

In [10]:
n = 200

sec_cov_values = sec_cov_values_full[:n, :n]
signal = signal_full[:n]

Optimizing Weights



In [11]:
%%time
w = Variable(n)

lbound = 0.
ubound = 1. / n * 20

risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T * risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

objective = Minimize(risk_penlty * risk  - signal * w)
constraints = [w >= lbound,
               w <= ubound,
               sum(w) == 1,]

prob = Problem(objective, constraints)


Wall time: 2.99 ms

In [12]:
%%time
prob.solve(verbose=True)


-----------------------------------------------------------------
           OSQP v0.4.1  -  Operator Splitting QP Solver
              (c) Bartolomeo Stellato,  Goran Banjac
        University of Oxford  -  Stanford University 2018
-----------------------------------------------------------------
problem:  variables n = 439, constraints m = 640
          nnz(P) + nnz(A) = 4419
settings: linear system solver = qdldl,
          eps_abs = 1.0e-03, eps_rel = 1.0e-03,
          eps_prim_inf = 1.0e-04, eps_dual_inf = 1.0e-04,
          rho = 1.00e-01 (adaptive),
          sigma = 1.00e-06, alpha = 1.60, max_iter = 4000
          check_termination: on (interval 25),
          scaling: on, scaled_termination: off
          warm start: on, polish: on

iter  objective    pri res    dua res    rho        time
   1  -6.4413e+01   5.65e+00   6.37e+02   1.00e-01   1.36e-03s
 200  -2.4008e+00   4.49e-06   1.89e-03   1.55e+00   5.62e-03s
plsh  -2.4003e+00   5.55e-16   1.37e-14  ---------   6.38e-03s

status:               solved
solution polish:      successful
number of iterations: 200
optimal objective:    -2.4003
run time:             6.38e-03s
optimal rho estimate: 2.29e-01

Wall time: 29.9 ms
Out[12]:
-2.4003282365506444

In [13]:
prob.status, prob.value


Out[13]:
('optimal', -2.4003282365506444)

In [14]:
%%time
prob.solve(verbose=True, solver='ECOS')


ECOS 2.0.7 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  -3.332e-01  -1.048e+02  +2e+03  9e-01  1e-02  1e+00  5e+00    ---    ---    1  1  - |  -  - 
 1  -3.461e-01  -1.811e+01  +1e+03  2e-01  2e-03  8e-01  3e+00  0.5430  3e-01   1  1  1 |  0  0
 2  -2.875e+00  -4.039e+00  +2e+02  1e-02  9e-05  9e-02  4e-01  0.9297  6e-02   1  1  1 |  0  0
 3  -2.573e+00  -2.939e+00  +5e+01  5e-03  3e-05  3e-02  1e-01  0.7733  9e-02   1  1  2 |  0  0
 4  -2.449e+00  -2.553e+00  +1e+01  1e-03  8e-06  6e-03  3e-02  0.7708  6e-02   1  1  1 |  0  0
 5  -2.407e+00  -2.418e+00  +2e+00  1e-04  9e-07  6e-04  4e-03  0.9066  2e-02   1  1  1 |  0  0
 6  -2.402e+00  -2.404e+00  +3e-01  3e-05  2e-07  1e-04  7e-04  0.8248  1e-02   1  1  2 |  0  0
 7  -2.401e+00  -2.401e+00  +6e-02  6e-06  4e-08  9e-06  2e-04  0.9466  2e-01   2  2  2 |  0  0
 8  -2.400e+00  -2.400e+00  +5e-03  4e-07  3e-09  7e-07  1e-05  0.9237  1e-03   1  1  1 |  0  0
 9  -2.400e+00  -2.400e+00  +2e-04  2e-08  1e-10  2e-08  5e-07  0.9890  3e-02   1  1  1 |  0  0
10  -2.400e+00  -2.400e+00  +2e-06  2e-10  1e-12  2e-10  5e-09  0.9890  1e-04   1  1  1 |  0  0
11  -2.400e+00  -2.400e+00  +2e-08  2e-12  1e-14  2e-12  6e-11  0.9890  1e-04   2  1  1 |  0  0

OPTIMAL (within feastol=2.2e-12, reltol=9.8e-09, abstol=2.4e-08).
Runtime: 0.011237 seconds.

Wall time: 32.9 ms
Out[14]:
-2.400328236659518

In [15]:
prob.status, prob.value


Out[15]:
('optimal', -2.400328236659518)

In [16]:
%%time
P = matrix(sec_cov_values)
q = -matrix(signal)

G = np.zeros((2*n, n))
h = np.zeros(2*n)
for i in range(n):
    G[i, i] = 1.
    h[i] = 1. / n * 20
    G[i+n, i] = -1.
    h[i+n] = 0.
    
G = matrix(G)
h = matrix(h)
    
A = np.ones((1, n))
b = np.ones(1)

A = matrix(A)
b = matrix(b)

sol = solvers.qp(P, q, G, h, A, b)


     pcost       dcost       gap    pres   dres
 0: -4.0275e+01 -8.9373e+01  8e+03  6e+01  6e-16
 1: -2.7029e+00 -8.3780e+01  2e+02  1e+00  2e-15
 2: -1.3699e+00 -2.0914e+01  2e+01  3e-15  3e-15
 3: -1.6193e+00 -6.3167e+00  5e+00  5e-16  2e-15
 4: -1.8992e+00 -4.2870e+00  2e+00  5e-16  1e-15
 5: -2.1306e+00 -3.2594e+00  1e+00  4e-16  8e-16
 6: -2.1625e+00 -2.9783e+00  8e-01  3e-16  6e-16
 7: -2.2529e+00 -2.6835e+00  4e-01  3e-16  6e-16
 8: -2.3100e+00 -2.5413e+00  2e-01  1e-15  5e-16
 9: -2.3407e+00 -2.4723e+00  1e-01  8e-16  5e-16
10: -2.3953e+00 -2.4100e+00  1e-02  4e-16  1e-15
11: -2.4002e+00 -2.4005e+00  2e-04  2e-16  9e-16
12: -2.4003e+00 -2.4003e+00  2e-06  2e-16  9e-16
13: -2.4003e+00 -2.4003e+00  2e-08  2e-16  9e-16
Optimal solution found.
Wall time: 23.9 ms

In [17]:
%%time
lbound = np.zeros(n)
ubound = np.ones(n) * 20 / n
cons_matrix = np.ones((1, n))
clb = np.ones(1)
cub = np.ones(1)
qpopt = QPOptimizer(signal,
                    None,
                    lbound,
                    ubound,
                    cons_matrix,
                    clb,
                    cub,
                    1.,
                    risk_cov_values[:n, :n] / 10000.,
                    risk_exposure_values[:n],
                    special_risk_values[:n] * special_risk_values[:n] / 10000.)
qpopt.feval()
qpopt.status()


Wall time: 20.9 ms

Performace Timing



In [18]:
import datetime as dt

In [19]:
def time_function(py_callable, n):
    start = dt.datetime.now()
    val = py_callable(n)
    return (dt.datetime.now() - start).total_seconds(), val

In [20]:
def cvxpy(n):
    w = Variable(n)

    lbound = 0.
    ubound = 0.01
    
    risk = sum_squares(multiply(special_risk_values[:n] / 100., w)) + quad_form((w.T * risk_exposure_values[:n]).T, risk_cov_values[:n, :n] / 10000.)

    objective = Minimize(risk_penlty * risk  - signal * w)
    constraints = [w >= lbound,
                   w <= ubound,
                   sum(w) == 1,]

    prob = Problem(objective, constraints)
    prob.solve(verbose=False, solver='ECOS')
    return prob.value

In [21]:
def cvxopt(n):
    P = matrix(sec_cov_values)
    q = -matrix(signal)

    G = np.zeros((2*n, n))
    h = np.zeros(2*n)
    for i in range(n):
        G[i, i] = 1.
        h[i] = 0.01
        G[i+n, i] = -1.
        h[i+n] = 0.

    G = matrix(G)
    h = matrix(h)

    A = np.ones((1, n))
    b = np.ones(1)

    A = matrix(A)
    b = matrix(b)
    
    solvers.options['show_progress'] = False
    sol = solvers.qp(P, q, G, h, A, b)
    return sol['primal objective']

In [22]:
def ipopt(n):
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.01
    cons_matrix = np.ones((1, n))
    clb = np.ones(1)
    cub = np.ones(1)
    qpopt = QPOptimizer(signal, None, lbound, ubound, cons_matrix, clb, cub, 1.,
                        risk_cov_values[:n, :n] / 10000.,
                        risk_exposure_values[:n],
                        special_risk_values[:n] * special_risk_values[:n] / 10000.)
    return qpopt.feval()

In [23]:
n_steps = list(range(200, 3201, 200))
cvxpy_times = [None] * len(n_steps)
cvxopt_times = [None] * len(n_steps)
ipopt_times = [None] * len(n_steps)
print("{0:<8}{1:>12}{2:>12}{3:>12}".format('Scale(n)', 'cvxpy', 'cvxopt', 'ipopt'))

for i, n in enumerate(n_steps):
    sec_cov_values = sec_cov_values_full[:n, :n]
    signal = signal_full[:n]
    cvxpy_times[i], val1 = time_function(cvxpy, n)
    cvxopt_times[i], val2 = time_function(cvxopt, n)
    ipopt_times[i], val3 = time_function(ipopt, n)
    
    np.testing.assert_almost_equal(val1, val2, 4)
    np.testing.assert_almost_equal(val2, val3, 4)
    
    print("{0:<8}{1:>12.4f}{2:>12.4f}{3:>12.4f}".format(n, cvxpy_times[i], cvxopt_times[i], ipopt_times[i]))


Scale(n)       cvxpy      cvxopt       ipopt
200           0.0340      0.0109      0.0120
400           0.0399      0.0469      0.0199
600           0.0519      0.1606      0.0319
800           0.0708      0.5037      0.0519
1000          0.1027      0.9594      0.0469
1200          0.1406      1.6586      0.0539
D:\ProgramData\anaconda3\lib\site-packages\cvxpy-1.0.10-py3.6-win-amd64.egg\cvxpy\problems\problem.py:614: RuntimeWarning: overflow encountered in long_scalars
  if self.max_big_small_squared < big*small**2:
1400          0.1386      2.7945      0.0728
1600          0.1586      3.8437      0.0987
D:\ProgramData\anaconda3\lib\site-packages\cvxpy-1.0.10-py3.6-win-amd64.egg\cvxpy\problems\problem.py:615: RuntimeWarning: overflow encountered in long_scalars
  self.max_big_small_squared = big*small**2
1800          0.2224      8.4165      0.1885
2000          0.3022     10.5777      0.1905
2200          0.3201     15.0518      0.1676
2400          0.3282     17.1641      0.2045
2600          0.3391     23.3546      0.1985
2800          0.4807     29.7235      0.2424
3000          0.4199     39.5318      0.2804
3200          0.5067     53.5702      0.2586

In [ ]: